import re
from docx.oxml.text.paragraph import CT_P
from docx.oxml.table import CT_Tbl
from docx.text.paragraph import Paragraph
from docx.table import Table
from docx import Document
from Dagang import Dagang
# doc = Document("e:\\2.docx")
doc = Document("d:\\专业选修课.docx")
# p=doc.add_heading("《电子商务》教学大纲")
# p=doc.add_paragraph("第一章")


def printchildren(b1, line=[], deep=99):
    line2 = line.copy()
    line2.append(0)
    for i, b2 in enumerate(b1.iterchildren()):
        line2[-1] = i
        print(line2, type(b2))
        if len(line2) < deep:
            printchildren(b2, line2, deep)


def printdoc(doc):
    body = doc.element.body
    # print(dir(body))
    # printchildren(body)
    dagang = None

    for i, b1 in enumerate(body.iterchildren()):

        if isinstance(b1, CT_P):
            p = Paragraph(b1, body)
            g = re.search(r'《(?P<title>\w*)》课程教学大纲', p.text)
            title = g.group('title') if g and g.group('title') else ''

            if title:
                if dagang and dagang.title:
                    dagang.save()
                    dagang = None
                    break
                dagang = Dagang(title)
            if dagang:
                dagang.content.append(p)
            # continue
        elif isinstance(b1, CT_Tbl):
            t = Table(b1, body)
            # print(i, 'table', len(t.rows), len(t.columns))
            # break
            if dagang:
                dagang.content.append(t)
        else:
            print(i, '未处理类型'*5, type(b1))
            pass

    if dagang:
        dagang.save()


printdoc(doc)
# for p in doc.paragraphs:
#     print(p.text)
